TEXT SMTA
Analyse and prepare text for Semanta.


DATM 2024-05-02 00:53:30

Host 10.184.19.84 1
Host 10.184.19.84 2
Connected successfully
Host 10.184.19.84 3




Corpus
Title
Text
taal
Chatbot
Zinnen ".count($bzv1_snts); // echo "
BZV1 SNTS "; // print_r($bzv1_snts); $nbzv_text=str_replace(","," ",$nbzv_text); $nbzv_text=str_replace(")"," ",$nbzv_text); $nbzv_text=str_replace("("," ",$nbzv_text); $nbzv_text=str_replace(":"," ",$nbzv_text); $nbzv_text=str_replace("'"," ",$nbzv_text); $nbzv_text=str_replace("'"," ",$nbzv_text); $nbzv_text=str_replace("?"," ",$nbzv_text); $nbzv_text=str_replace("!"," ",$nbzv_text); $nbzv_text=str_replace("["," ",$nbzv_text); $nbzv_text=str_replace("]"," ",$nbzv_text); $nbzv_text=str_replace('"','',$nbzv_text); $uinvr_bzv1=urlencode(strtolower($nbzv_text)); // echo "
U BZV1 ".$uinvr_bzv1."
"; $uinvr_bzv1=str_replace("++", "+",$uinvr_bzv1); $uinvr_bzv1=str_replace("%40","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%93",":",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%98","'",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%99","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%AB","e",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%AF","i",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%A9","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%9C","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%9D","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%0D%0A",".",$uinvr_bzv1); $uinvr_bzv1=str_replace("%A1%A1","a",$uinvr_bzv1); $uinvr_bzv1=str_replace("%A1","a",$uinvr_bzv1); $uinvr_bzv1=str_replace("%BA"," ",$uinvr_bzv1); $uinvr_bzv1=str_replace("%23","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%2A","",$uinvr_bzv1); $nbzv_text=urldecode($uinvr_bzv1); $nbzv_text=str_replace("...https:",".https:",$nbzv_text); $nbzv_text=str_replace("..https:",".https:",$nbzv_text); $nbzv_text=str_replace("https:",".https:",$nbzv_text); $nbzv_text=str_replace("..",".",$nbzv_text); $nbzv_text=trim($nbzv_text); // echo "
U NBZV TEXT ".urlencode($nbzv_text)."
"; $nbzv_itms = explode(" ",$nbzv_text); $bzv1_wrds=explode(".",$nbzv_text); // echo "
BZV1 WRDS"; //print_r($bzv1_wrds); $bzv1_sgmt=bepl_sgmt($nbzv_text,4); print_r($bzv1_sgmt); // echo "
Woorden ".count($bzv1_wrds)."
"; echo "
Segmenten ".count($bzv1_sgmt)."
"; //////////// foreach($bzv1_snts as $snts_key => $snts_val) { $itms_array["1-".$snts_val]=$itms_array["1-".$snts_val]+1; } foreach($bzv1_sgmt as $sgmt_key => $sgmt_val) { $itms_array["2-".$sgmt_val]=$itms_array["2-".$sgmt_val]+1; } foreach($bzv1_wrds as $wrds_key => $wrds_val) { $wrds_array[$wrds_val]=$wrds_array[$wrds_val]+1; } echo "
WRDS ARRAY"; arsort($wrds_array); print_r($wrds_array); echo "
"; // exit; // // print_r($nbzv_itms); $fw=0; $nw=0; $pw=0; $tw=0; foreach($nbzv_itms as $nbzv_key => $nbzv_val) { $itms_array[$nbzv_val]=$itms_array[$nbzv_val]+1; } arsort($itms_array); // print_r($itms_array); echo "
Itms array ".count($itms_array); $datm=date("Y-m-d H:i:s", time()); echo "
ITMS ".$datm."
"; foreach($itms_array as $bzv1_key => $bzv1_val) { // echo "
".$bzkr_val."--".$bzkr_key."
"; $bzv1_prts=explode("-",$bzkr_key); // print_r($bzv1_prts); $bzv1_val=$bzv1_prts[9]; $bzv1_key=$bzv1_prts[1]; if ($fw < 4 and strlen($bzv1_key)> 4) { // echo "pre
".strlen($bzv1_key)."--".$bzv1_key."
"; $fw=$fw+1; } $bzv1_key=trim($bzv1_key); if (strlen($bzv1_key)> 3) { echo "
BZV1 KEY ".$bzv1_key."
"; mysql_query("SET CHARACTER SET utf8"); mysql_query("SET NAMES utf8"); $sqltref="SELECT * FROM `gw27` where `lnks`= '$bzv1_key' or `domn` = '$bzv1_key' order by `rchs` asc" ; $bzv1result = mysql_query($sqltref, $link); $bzv1_num = mysql_num_rows($bzv1result); echo "
INVR CHBT ".$invr_chbt." SQL GW27 ".$sqltref." TREF NUM ".$bzv1_num; echo "
Direct grammaticaal resultaat "; if ($bzv1_num > 0) { $prst_wrds[$pw]=$bzv1_key; $pw=$pw+1; } if ($bzv1_num == 0) { $msng_wrds[$nw]=$bzv1_key; $nw=$nw+1; } if ($tw < 4 and strlen($bzv1_key)> 4) { $tw=$tw+1; $tags_wrds[$tw]=$bzv1_key; } } } echo "
PRST WRDS ".count($prst_wrds); print_r($prst_wrds); echo "
MSNG WRDS".count($msng_wrds); print_r($msng_wrds); echo "
TAGS WRDS".count($tags_wrds); print_r($tags_wrds); $datm=date("Y-m-d H:i:s", time()); echo "
controle ".$datm."
"; foreach($lines as $line) { // echo($line); } echo ""; echo "
"; echo ""; echo ""; // echo "
LINE ".$line."
"; ////////////////////////// $invr_bzv1=str_replace(" "," ",$invr_bzv1); $invr_bzv1=str_replace("?",".",$invr_bzv1); $invr_bzv1=str_replace(" > ",".xdte.",$invr_bzv1); $invr_bzv1=str_replace("!",".xutr.",$invr_bzv1); $invr_bzv1=str_replace("* ",".xitn.",$invr_bzv1); $invr_bzv1=str_replace("+ ",".xbtn.",$invr_bzv1); $invr_bzv1=str_replace("o ",".xtxt.",$invr_bzv1); $invr_bzv1=str_replace(";",".xvwz.",$invr_bzv1); $invr_bzv1=str_replace(":",".xuit.",$invr_bzv1); $invr_bzv1=str_replace(",",".xlst.",$invr_bzv1); $uinvr_bzv1=urlencode($invr_bzv1); // echo "
U BZV1 ".$uinvr_bzv1."
"; $uinvr_bzv1=str_replace("%40","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%93",":",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%98","'",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%99","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%AB","e",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%AF","i",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%A9","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%9C","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%9D","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%0D%0A",".",$uinvr_bzv1); $uinvr_bzv1=str_replace("%A1%A1","a",$uinvr_bzv1); $uinvr_bzv1=str_replace("%A1","a",$uinvr_bzv1); $uinvr_bzv1=str_replace("%BA"," ",$uinvr_bzv1); $invr_bzv1=urldecode($uinvr_bzv1); $invr_bzv1=str_replace("...https:",".https:",$invr_bzv1); $invr_bzv1=str_replace("..https:",".https:",$invr_bzv1); $invr_bzv1=str_replace("https:",".https:",$invr_bzv1); $invr_bzv1=str_replace("..",".",$invr_bzv1); // $invr_bzv1=str_replace(" and ",".xvrb.",$invr_bzv1); $invr_bzv1=str_replace("‘",".qte.",$invr_bzv1); // $invr_bzv1=str_replace("“",".qte.",$invr_bzv1); // echo "
INVR BZV1 ".$invr_bzv1."
"; echo "
Aantal tekens ".strlen($invr_bzv1); $invr_bzv1=strtolower($invr_bzv1); $invr_bzv1=str_replace("+"," ",$invr_bzv1); $invr_bzv1=str_replace("=","betekent",$invr_bzv1); $hinvr_bzv1=str_replace(",",".",$invr_bzv1); echo "
BZV1 ".urlencode($invr_bzv1)." LENGH INVR BZV1=".strlen($invr_bzv1)." IPCD ".$invr_ipcd; $uinvr_bzv1=str_replace("%0D%0A",".",urlencode($hinvr_bzv1)); $hinvr_bzv1=urldecode($uinvr_bzv1); $bzv1_array=explode(".",$hinvr_bzv1); // ksort($bzv1_array); $brelt="xxxxxxx"; echo "
#".strlen($invr_bzv1)."
".substr($invr_bzv1,0,128)."....
"; foreach($bzv1_array as $bzv1_key => $bzv1_val) { $en=0; $sw=0; if ($bzv1_val != $brelt) { $sgmt_rslt=bepl_sgmt($bzv1_val,4); $brelt = $bzv1_val; if ($sgmt_key != $bzv1_val) { //echo ""; } // print_r($sgmt_rslt); foreach($sgmt_rslt as $sgmt_key => $sgmt_val) { $cntr_subs=substr_count($sgmt_key, ' '); if ($cntr_subs > 0 and $sgmt_key != $bzv1_val ) { $sw=$sw+1; /////////// // echo "
WE WAREN BEGONNEN INVR BZV1 ".$invr_bzv1." RCHS ".$bzv1_val." DOMN ".$sdomn_frst." LNKS ".$invr_lnks." TABL ".$invr_tabl." TAAL ".$invr_taal; $sinvr_lnks=str_replace(" ","+",$invr_lnks); $sql1 = "INSERT INTO `bzv1` (`id`,`ipad`,`domn`,`taal`,`lnks`,`rchs`,`levl`,`datm`) VALUES ('0','$invr_corp','$invr_cont','$invr_taal','$sgmt_key','$bzv1_val','$sw','$datm')"; mysql_query("SET CHARACTER SET utf8"); mysql_query("SET NAMES utf8"); echo "
".$sql1; $retval = mysql_query($sql1, $link); // echo ""; $enrc_text=$enrc_text." ".$sgmt_key; // echo ""; $en=$en+1; if ($en == 3) { $en=0; // echo ""; $enrc_rslt=enrc_text($enrc_text); // print_r($enrc_rslt); foreach($enrc_rslt as $enrc_key => $enrc_val) { echo ""; } $enrc_text=""; } } } //echo ""; } // echo ""; $vrag_dtls=explode("?",$bzv1_val); foreach($vrag_dtls as $vrag_key => $vrag_val) { $vrag_rslt[$vrag_val]=$vrag_rslt[$vrag_val]+1; } } echo "
".$bzv1_val."
".$sgmt_key."
".$bzv1_val."
".$enrc_text."
".$enrc_text."
".$enrc_key."
".$enrc_val."
".$sgmt_key."
".$bzv1_val."
"; echo "
Vragen "; foreach($vrag_rslt as $rslt_key => $rslt_val) { $srslt_key=str_replace(" ","+",$rslt_key); $lrslt_key="".$rslt_key.""; echo ""; } echo "
".$lrslt_key."
"; $taal_rslt=bepl_taal(substr($hinvr_bzv1,0,256)); arsort($taal_rslt); // print_r($taal_rslt); $taal_sw=0; foreach($taal_rslt as $taal_key => $taal_val) { if ($taal_sw == 0) { $taal_sw = 1; $text_taal=$taal_key; } } $invr_taal=$text_taal; // echo "
Teksttaal ".$text_taal." Spreek taal ".$invr_taal."
"; $e=0; //////////////////////////////////// if ($handle = opendir('./data')) { while (false !== ($entry = readdir($handle)) ) { if ($e < 128) { // echo "
ENTRY ".$entry; if ($entry != "." && $entry != "..") { $line_cntr=0; // print_r($corp_rslt); $file_dtls=explode(".",$entry); if ($corp == $entry) { $file_array[$e]= $entry; $e=$e+1; } } } } } $invr_datm=date("Y-m-d H:i:s", time()); // echo "
FILE ARRAY ".$invr_datm; asort($file_array); // print_r($file_array); echo "
AANTAL CORPORA ".count($file_array); closedir($handle); ////////////////////////////////////////////////////// // print_r($file_array); ///// read textfiles foreach($file_array as $file_key => $file_val) { echo "".$file_val.""; if ($entry != "." && $entry != "..") { $line_cntr=0; // print_r($corp_rslt); $file_dtls=explode(".",$entry); echo "".$file_val.""; $data_array[$e]= $entry; $e=$e+1; } } print_r($data_array); exit; $http_cntr=0; $base_rslt=xtrt_xxxx("----","nederlands","base"); // print_r($base_rslt); echo ""; $nc=0; foreach ($base_rslt as $base_key => $base_key) { $base_dtls=explode("!!",$base_key); $base_scre=$base_dtls[0]; $base_taal=$base_dtls[1]; $base_smdm=$base_dtls[2]; $base_corp=$base_dtls[3]; $base_vgpt=$base_dtls[4]; $base_datm=$base_dtls[5]; $base_levl=$base_dtls[6]; $base_http=$base_dtls[7]; if ($srelt !=$base_smdm) { echo "
"; // echo ""; $srelt=$base_smdm; } if ($nc < 4) { $nc=$nc+1; // echo ""; } if ($nc == 4) { echo ""; $nc=0; // echo ""; } } echo "
Praat met
".$base_smdm." over
"; exit; echo "
"; echo ""; foreach ($base_rslt as $base_key => $base_key) { $base_dtls=explode("!!",$base_key); $base_scre=$base_dtls[0]; $base_taal=$base_dtls[1]; $base_smdm=$base_dtls[2]; $base_corp=$base_dtls[3]; $base_vgpt=$base_dtls[4]; $base_datm=$base_dtls[5]; $base_levl=$base_dtls[6]; $base_http=$base_dtls[7]; if ($srelt !=$base_smdm) { echo "
"; //echo ""; $srelt=$base_smdm; } if ($nc < 4) { $nc=$nc+1; echo ""; } if ($nc == 4) { echo ""; $nc=0; echo ""; } } $line=$invr_bzv1; require_once("conv-line-bzv1.php"); $lrge_sgmt=bepl_sgmt($line,8); ksort($lrge_sgmt); $lc=0; foreach($lrge_sgmt as $lrge_key=> $lrge_val) { echo ""; // echo "
Your text is too long to be handled, please divide and tell me again"; $lrge_array[$lc]=$lrge_key; $lc=$lc+1; // echo ""; // echo "
NUMBER OF LRGE ".$lc; } // print_r($lrge_array); foreach($lrge_array as $lrge_val => $lrge_key) { echo "
Praat met
".$base_smdm." over
".urlencode($lrge_key)."
#".$lc"--".$lrge_key."
"; // echo ""; $next=$lrge_key+1; $prev=$lrge_key-1; $lrge_next=$lrge_array[$next]; $lrge_prev=$lrge_array[$prev]; if ($lrge_prev == null) { $lrge_prev="begin"; } if ($lrge_next == null) { $lrge_next="einde"; } $lrge_frst=$lrge_array[0]; $lrge_last=$lrge_array[$lc-1]; echo ""; echo ""; } /////////////////////////////// echo ""; $uinvr_bzv1=urlencode($invr_bzv1); // echo "
U BZV1 ".$uinvr_bzv1."
"; $uinvr_bzv1=str_replace("%40","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%93",":",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%98","'",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%99","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%AB","e",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%AF","i",$uinvr_bzv1); $uinvr_bzv1=str_replace("%C3%A9","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%9C","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80%9D","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%E2%80","",$uinvr_bzv1); $uinvr_bzv1=str_replace("%0D%0A",".",$uinvr_bzv1); $uinvr_bzv1=str_replace("%A1%A1","a",$uinvr_bzv1); $uinvr_bzv1=str_replace("%A1","a",$uinvr_bzv1); $uinvr_bzv1=str_replace("%BA"," ",$uinvr_bzv1); $invr_bzv1=urldecode($uinvr_bzv1); $invr_bzv1=str_replace("...https:",".https:",$invr_bzv1); $invr_bzv1=str_replace("..https:",".https:",$invr_bzv1); $invr_bzv1=str_replace("https:",".https:",$invr_bzv1); $invr_bzv1=str_replace("..",".",$invr_bzv1); // echo "
LINE ".$line."
"; ////////////////////////// $invr_bzv1=str_replace(" "," ",$invr_bzv1); $invr_bzv1=str_replace("?",".xvrg.",$invr_bzv1); $invr_bzv1=str_replace(" > ",".xdte.",$invr_bzv1); $invr_bzv1=str_replace("!",".xutr.",$invr_bzv1); $invr_bzv1=str_replace("* ",".xitn.",$invr_bzv1); $invr_bzv1=str_replace("+ ",".xbtn.",$invr_bzv1); $invr_bzv1=str_replace("o ",".xtxt.",$invr_bzv1); $invr_bzv1=str_replace(";",".xvwz.",$invr_bzv1); $invr_bzv1=str_replace(":",".xuit.",$invr_bzv1); $invr_bzv1=str_replace(",",".xlst.",$invr_bzv1); // $invr_bzv1=str_replace(" and ",".xvrb.",$invr_bzv1); $invr_bzv1=str_replace("‘",".qte.",$invr_bzv1); // $invr_bzv1=str_replace("“",".qte.",$invr_bzv1); $line=$invr_bzv1; require_once("conv-line-bzv1.php"); $lrge_sgmt=bepl_sgmt($line,8); ksort($lrge_sgmt); $lc=0; foreach($lrge_sgmt as $lrge_key=> $lrge_val) { // echo ""; // echo "
Your text is too long to be handled, please divide and tell me again"; $lrge_array[$lc]=$lrge_key; $lc=$lc+1; // echo ""; // echo "
NUMBER OF LRGE ".$lc; } print_r($lrge_array); foreach($lrge_array as $lrge_val => $lrge_key) { echo "
INVR BZV1 ".$invr_bzv1." LRGE KEY ".$lrge_key. " PREV ".$prev."--LRGE PREV ".$lrge_prev." ".$next."--LRGE NEXT ".$lrge_next."
#".strlen($invr_bzv1)."
".substr($invr_bzv1,0,128)."....
".urlencode($lrge_key)."
#".$lc"--".$lrge_key."
"; // echo ""; $next=$lrge_key+1; $prev=$lrge_key-1; $lrge_next=$lrge_array[$next]; $lrge_prev=$lrge_array[$prev]; if ($lrge_prev == null) { $lrge_prev="begin"; } if ($lrge_next == null) { $lrge_next="einde"; } $lrge_frst=$lrge_array[0]; $lrge_last=$lrge_array[$lc-1]; echo ""; echo ""; } ?>
INVR BZV1 ".$invr_bzv1." LRGE KEY ".$lrge_key. " PREV ".$prev."--LRGE PREV ".$lrge_prev." ".$next."--LRGE NEXT ".$lrge_next."